
############################################################################################
############################################################################################
## Empirical Ethics                                                                       ##
## Baron & Young                                                                          ##
## ANALYSIS                                                                               ##
## May 2021                                                                               ##
############################################################################################
############################################################################################


##########
## PRELIMS
##########

rm(list=ls())

setwd('~/Dropbox/Young Baron Research Ethics/BaronYoung2021_RepPkg')

library(gtools)
library(ggplot2)
library(readxl)
library(dplyr)
library(zoo)
library(logr)
library(tidylog)


# Create temp file location
tmp <- file.path("02_Code/BaronYoung2021.log")

# Open log
lf <- log_open(tmp, autolog = TRUE, show_notes = FALSE)


stringsAsFactors=FALSE


#####
## read in data
#####

## total sample of articles
frame <- read_excel("01_Data/CodedArticles_MASTER_20200725.xlsx", sheet = "TotalArticles")

## violence articles
dat <- read_excel("01_Data/CodedArticles_MASTER_20200725.xlsx", skip = 1, sheet = "ViolenceArticles")


##########
## FIGURE 1
##########

#####
## calculate total % violence articles out of total
#####

dat$relevance <- tolower(dat$relevance)
table(dat$relevance)

t <- dat %>% 
  group_by(year, journal) %>% 
  summarize(count_all = length(citation), 
            count_nolow = length(citation[relevance!="low"]))

frame <- merge(frame, t, by = c('journal', 'year'), all.x = T)
frame$count_all[is.na(frame$count_all)==T] <- 0
frame$count_nolow[is.na(frame$count_nolow)==T] <- 0


#####
## plot % violence articles out of total
#####

t <- frame %>% 
  group_by(year) %>% 
  summarize(pct_viol_all = sum(count_all)/sum(`total # articles`),
            pct_viol_nolow = sum(count_nolow)/sum(`total # articles`))

## calculate rolling average
t <- t %>%
  arrange(year) %>%
  mutate(pct_viol_all_roll3 = rollmean(x = pct_viol_all, 3, fill = NA, align = 'center'),
         pct_viol_nolow_roll3 = rollmean(x = pct_viol_nolow, 3, fill = NA, align = 'center'))


pdf("03_Figures/ViolPct_Year.pdf")
ggplot(data = t, aes(x = year, y = pct_viol_all)) +
  geom_line() +
  geom_smooth(method = lm, colour = "black") +
  theme_minimal() +
  theme(text = element_text(size = 22)) +
  scale_x_discrete('Year', limits = seq(2008,2019,2)) +
  scale_y_continuous('Percent of All Published Articles', limits = c(0.01,0.075), labels = scales::percent)
dev.off()



##########
## PREP COMBINED DATASET
##########

#####
## make harmonized versions of codings
#####

## create coder 1 and coder 2 ID

dat$Coder_1 <- ifelse(is.na(dat$IRBNo_AB)==F, 'AB',
                      ifelse(is.na(dat$IRBNo_RR)==F, "RR",
                             ifelse(is.na(dat$IRBNo_KM)==F, "KM",
                                    ifelse(is.na(dat$IRBNo_AP)==F, "AP",
                                           "NW"))))
dat$Coder_2 <- ifelse(is.na(dat$IRBNo_AB)==F, 
                      ifelse(is.na(dat$IRBNo_RR)==F, 'RR',
                             ifelse(is.na(dat$IRBNo_KM)==F, "KM",
                                    ifelse(is.na(dat$IRBNo_AP)==F, "AP",
                                           "NW"))),
                      ifelse(is.na(dat$IRBNo_KM)==F, "KM",
                             ifelse(is.na(dat$IRBNo_AP)==F, "AP",
                                    "NW")))
dat$Coder_2[dat$Coder_1==dat$Coder_2] <- 'None'


## IRB number

dat$IRBNo_1 <- NA
dat$IRBNo_1[dat$Coder_1=="AB"] <- dat$IRBNo_AB[dat$Coder_1=="AB"]
dat$IRBNo_1[dat$Coder_1=="AP"] <- dat$IRBNo_AP[dat$Coder_1=="AP"]
dat$IRBNo_1[dat$Coder_1=="KM"] <- dat$IRBNo_KM[dat$Coder_1=="KM"]
dat$IRBNo_1[dat$Coder_1=="NW"] <- dat$IRBNo_NW[dat$Coder_1=="NW"]
dat$IRBNo_1[dat$Coder_1=="RR"] <- dat$IRBNo_RR[dat$Coder_1=="RR"]

dat$IRBNo_2 <- NA
dat$IRBNo_2[dat$Coder_2=="AB"] <- dat$IRBNo_AB[dat$Coder_2=="AB"]
dat$IRBNo_2[dat$Coder_2=="AP"] <- dat$IRBNo_AP[dat$Coder_2=="AP"]
dat$IRBNo_2[dat$Coder_2=="KM"] <- dat$IRBNo_KM[dat$Coder_2=="KM"]
dat$IRBNo_2[dat$Coder_2=="NW"] <- dat$IRBNo_NW[dat$Coder_2=="NW"]
dat$IRBNo_2[dat$Coder_2=="RR"] <- dat$IRBNo_RR[dat$Coder_2=="RR"]

table(dat$IRBNo_1, useNA='ifany')
table(dat$IRBNo_2, useNA='ifany')


## Risk assessment

dat$RiskAssess_1 <- NA
dat$RiskAssess_1[dat$Coder_1=="AB"] <- dat$RiskAssess_AB[dat$Coder_1=="AB"]
dat$RiskAssess_1[dat$Coder_1=="AP"] <- dat$RiskAssess_AP[dat$Coder_1=="AP"]
dat$RiskAssess_1[dat$Coder_1=="KM"] <- dat$RiskAssess_KM[dat$Coder_1=="KM"]
dat$RiskAssess_1[dat$Coder_1=="NW"] <- dat$RiskAssess_NW[dat$Coder_1=="NW"]
dat$RiskAssess_1[dat$Coder_1=="RR"] <- dat$RiskAssess_RR[dat$Coder_1=="RR"]

dat$RiskAssess_2 <- NA
dat$RiskAssess_2[dat$Coder_2=="AB"] <- dat$RiskAssess_AB[dat$Coder_2=="AB"]
dat$RiskAssess_2[dat$Coder_2=="AP"] <- dat$RiskAssess_AP[dat$Coder_2=="AP"]
dat$RiskAssess_2[dat$Coder_2=="KM"] <- dat$RiskAssess_KM[dat$Coder_2=="KM"]
dat$RiskAssess_2[dat$Coder_2=="NW"] <- dat$RiskAssess_NW[dat$Coder_2=="NW"]
dat$RiskAssess_2[dat$Coder_2=="RR"] <- dat$RiskAssess_RR[dat$Coder_2=="RR"]

table(dat$RiskAssess_1, useNA='ifany')
table(dat$RiskAssess_2, useNA='ifany')


## Observed harms

dat$ObservedHarms_1 <- NA
dat$ObservedHarms_1[dat$Coder_1=="AB"] <- dat$ObservedHarms_AB[dat$Coder_1=="AB"]
dat$ObservedHarms_1[dat$Coder_1=="AP"] <- dat$ObservedHarms_AP[dat$Coder_1=="AP"]
dat$ObservedHarms_1[dat$Coder_1=="KM"] <- dat$ObservedHarms_KM[dat$Coder_1=="KM"]
dat$ObservedHarms_1[dat$Coder_1=="NW"] <- dat$ObservedHarms_NW[dat$Coder_1=="NW"]
dat$ObservedHarms_1[dat$Coder_1=="RR"] <- dat$ObservedHarms_RR[dat$Coder_1=="RR"]

dat$ObservedHarms_2 <- NA
dat$ObservedHarms_2[dat$Coder_2=="AB"] <- dat$ObservedHarms_AB[dat$Coder_2=="AB"]
dat$ObservedHarms_2[dat$Coder_2=="AP"] <- dat$ObservedHarms_AP[dat$Coder_2=="AP"]
dat$ObservedHarms_2[dat$Coder_2=="KM"] <- dat$ObservedHarms_KM[dat$Coder_2=="KM"]
dat$ObservedHarms_2[dat$Coder_2=="NW"] <- dat$ObservedHarms_NW[dat$Coder_2=="NW"]
dat$ObservedHarms_2[dat$Coder_2=="RR"] <- dat$ObservedHarms_RR[dat$Coder_2=="RR"]

table(dat$ObservedHarms_1, useNA='ifany')
table(dat$ObservedHarms_2, useNA='ifany')


## Team risk

dat$TeamRisk_1 <- NA
dat$TeamRisk_1[dat$Coder_1=="AB"] <- dat$TeamRisk_AB[dat$Coder_1=="AB"]
dat$TeamRisk_1[dat$Coder_1=="AP"] <- dat$TeamRisk_AP[dat$Coder_1=="AP"]
dat$TeamRisk_1[dat$Coder_1=="KM"] <- dat$TeamRisk_KM[dat$Coder_1=="KM"]
dat$TeamRisk_1[dat$Coder_1=="NW"] <- dat$TeamRisk_NW[dat$Coder_1=="NW"]
dat$TeamRisk_1[dat$Coder_1=="RR"] <- dat$TeamRisk_RR[dat$Coder_1=="RR"]

dat$TeamRisk_2 <- NA
dat$TeamRisk_2[dat$Coder_2=="AB"] <- dat$TeamRisk_AB[dat$Coder_2=="AB"]
dat$TeamRisk_2[dat$Coder_2=="AP"] <- dat$TeamRisk_AP[dat$Coder_2=="AP"]
dat$TeamRisk_2[dat$Coder_2=="KM"] <- dat$TeamRisk_KM[dat$Coder_2=="KM"]
dat$TeamRisk_2[dat$Coder_2=="NW"] <- dat$TeamRisk_NW[dat$Coder_2=="NW"]
dat$TeamRisk_2[dat$Coder_2=="RR"] <- dat$TeamRisk_RR[dat$Coder_2=="RR"]

table(dat$TeamRisk_1, useNA='ifany')
table(dat$TeamRisk_2, useNA='ifany')



## Consent

dat$Consent_1 <- NA
dat$Consent_1[dat$Coder_1=="AB"] <- dat$Consent_AB[dat$Coder_1=="AB"]
dat$Consent_1[dat$Coder_1=="AP"] <- dat$Consent_AP[dat$Coder_1=="AP"]
dat$Consent_1[dat$Coder_1=="KM"] <- dat$Consent_KM[dat$Coder_1=="KM"]
dat$Consent_1[dat$Coder_1=="NW"] <- dat$Consent_NW[dat$Coder_1=="NW"]
dat$Consent_1[dat$Coder_1=="RR"] <- dat$Consent_RR[dat$Coder_1=="RR"]

dat$Consent_2 <- NA
dat$Consent_2[dat$Coder_2=="AB"] <- dat$Consent_AB[dat$Coder_2=="AB"]
dat$Consent_2[dat$Coder_2=="AP"] <- dat$Consent_AP[dat$Coder_2=="AP"]
dat$Consent_2[dat$Coder_2=="KM"] <- dat$Consent_KM[dat$Coder_2=="KM"]
dat$Consent_2[dat$Coder_2=="NW"] <- dat$Consent_NW[dat$Coder_2=="NW"]
dat$Consent_2[dat$Coder_2=="RR"] <- dat$Consent_RR[dat$Coder_2=="RR"]

table(dat$Consent_1, useNA='ifany')
table(dat$Consent_2, useNA='ifany')


## Other ethics

dat$OtherEthics_1 <- NA
dat$OtherEthics_1[dat$Coder_1=="AB"] <- dat$OtherEthics_AB[dat$Coder_1=="AB"]
dat$OtherEthics_1[dat$Coder_1=="AP"] <- dat$OtherEthics_AP[dat$Coder_1=="AP"]
dat$OtherEthics_1[dat$Coder_1=="KM"] <- dat$OtherEthics_KM[dat$Coder_1=="KM"]
dat$OtherEthics_1[dat$Coder_1=="NW"] <- dat$OtherEthics_NW[dat$Coder_1=="NW"]
dat$OtherEthics_1[dat$Coder_1=="RR"] <- dat$OtherEthics_RR[dat$Coder_1=="RR"]

dat$OtherEthics_2 <- NA
dat$OtherEthics_2[dat$Coder_2=="AB"] <- dat$OtherEthics_AB[dat$Coder_2=="AB"]
dat$OtherEthics_2[dat$Coder_2=="AP"] <- dat$OtherEthics_AP[dat$Coder_2=="AP"]
dat$OtherEthics_2[dat$Coder_2=="KM"] <- dat$OtherEthics_KM[dat$Coder_2=="KM"]
dat$OtherEthics_2[dat$Coder_2=="NW"] <- dat$OtherEthics_NW[dat$Coder_2=="NW"]
dat$OtherEthics_2[dat$Coder_2=="RR"] <- dat$OtherEthics_RR[dat$Coder_2=="RR"]

table(dat$OtherEthics_1, useNA='ifany')
table(dat$OtherEthics_2, useNA='ifany')



##########
## INTER-RATER RELIABILITY
##########

irrs <- data.frame('var' = c('IRBNo', 'RiskAssess', 'ObservedHarms', 'TeamRisk', 'Consent', 'OtherEthics'),
                   'irr' = NA)

t <- table(dat$IRBNo_1, dat$IRBNo_2)
irrs$irr[irrs$var=="IRBNo"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

t <- table(dat$RiskAssess_1, dat$RiskAssess_2)
irrs$irr[irrs$var=="RiskAssess"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

t <- table(dat$ObservedHarms_1, dat$ObservedHarms_2)
irrs$irr[irrs$var=="ObservedHarms"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

t <- table(dat$TeamRisk_1, dat$TeamRisk_2)
irrs$irr[irrs$var=="TeamRisk"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

t <- table(dat$Consent_1, dat$Consent_2)
irrs$irr[irrs$var=="Consent"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

t <- table(dat$OtherEthics_1, dat$OtherEthics_2)
irrs$irr[irrs$var=="OtherEthics"] <- (t['No','No'] + t['Yes','Yes'])/sum(t)

irrs


##########
## FIGURE 2
##########

#####
## calculating % of articles that use each practice
#####

dat$IRBNo_both <- ifelse(dat$IRBNo_1=="Yes" & dat$IRBNo_2=="Yes", 1, 0)
dat$IRBNo_one <- ifelse(dat$IRBNo_1=="Yes" | dat$IRBNo_2=="Yes", 1, 0)

dat$RiskAssess_both <- ifelse(dat$RiskAssess_1=="Yes" & dat$RiskAssess_2=="Yes", 1, 0)
dat$RiskAssess_one <- ifelse(dat$RiskAssess_1=="Yes" | dat$RiskAssess_2=="Yes", 1, 0)

dat$ObservedHarms_both <- ifelse(dat$ObservedHarms_1=="Yes" & dat$ObservedHarms_2=="Yes", 1, 0)
dat$ObservedHarms_one <- ifelse(dat$ObservedHarms_1=="Yes" | dat$ObservedHarms_2=="Yes", 1, 0)

dat$TeamRisk_both <- ifelse(dat$TeamRisk_1=="Yes" & dat$TeamRisk_2=="Yes", 1, 0)
dat$TeamRisk_one <- ifelse(dat$TeamRisk_1=="Yes" | dat$TeamRisk_2=="Yes", 1, 0)

dat$Consent_both <- ifelse(dat$Consent_1=="Yes" & dat$Consent_2=="Yes", 1, 0)
dat$Consent_one <- ifelse(dat$Consent_1=="Yes" | dat$Consent_2=="Yes", 1, 0)

dat$OtherEthics_both <- ifelse(dat$OtherEthics_1=="Yes" & dat$OtherEthics_2=="Yes", 1, 0)
dat$OtherEthics_one <- ifelse(dat$OtherEthics_1=="Yes" | dat$OtherEthics_2=="Yes", 1, 0)

  ## prevalence of transparency in any year
plot_one_all <- dat %>% 
  summarize('pct_IRBNo_one' = sum(IRBNo_one, na.rm=T)/length(IRBNo_one),
            'pct_RiskAssess_one' = sum(RiskAssess_one, na.rm=T)/length(RiskAssess_one),
            'pct_ObservedHarms_one' = sum(ObservedHarms_one, na.rm=T)/length(ObservedHarms_one),
            'pct_TeamRisk_one' = sum(TeamRisk_one, na.rm=T)/length(TeamRisk_one),
            'pct_Consent_one' = sum(Consent_one, na.rm=T)/length(Consent_one),
            'pct_OtherEthics_one' = sum(OtherEthics_one, na.rm=T)/length(OtherEthics_one))

  ## prevalence last five years
plot_one_recent <- dat %>% 
  filter(year > 2014) %>% 
  summarize('pct_IRBNo_one' = sum(IRBNo_one, na.rm=T)/length(IRBNo_one),
            'pct_RiskAssess_one' = sum(RiskAssess_one, na.rm=T)/length(RiskAssess_one),
            'pct_ObservedHarms_one' = sum(ObservedHarms_one, na.rm=T)/length(ObservedHarms_one),
            'pct_TeamRisk_one' = sum(TeamRisk_one, na.rm=T)/length(TeamRisk_one),
            'pct_Consent_one' = sum(Consent_one, na.rm=T)/length(Consent_one),
            'pct_OtherEthics_one' = sum(OtherEthics_one, na.rm=T)/length(OtherEthics_one))

  ## prevalence med/high relevance
plot_one_relevant <- dat %>% 
  filter(relevance %in% c('high', 'medium')) %>% 
  summarize('pct_IRBNo_one' = sum(IRBNo_one, na.rm=T)/length(IRBNo_one),
            'pct_RiskAssess_one' = sum(RiskAssess_one, na.rm=T)/length(RiskAssess_one),
            'pct_ObservedHarms_one' = sum(ObservedHarms_one, na.rm=T)/length(ObservedHarms_one),
            'pct_TeamRisk_one' = sum(TeamRisk_one, na.rm=T)/length(TeamRisk_one),
            'pct_Consent_one' = sum(Consent_one, na.rm=T)/length(Consent_one),
            'pct_OtherEthics_one' = sum(OtherEthics_one, na.rm=T)/length(OtherEthics_one))


plot <- cbind.data.frame('percent' = c(t(plot_one_all), t(plot_one_recent), t(plot_one_relevant)),
                         'Date' = factor(c(rep('All Violence Articles', 6), rep('2015-2019', 6), rep('Highest Risk', 6)),
                                         levels = c('All Violence Articles', 'Highest Risk', '2015-2019')),
                         'x' = seq(1,6,1), 
                         'var' = factor(c('IRB Number', 'Risk Assessment', 'Observed Harms',
                                                     'Team Risk', 'Consent', 'Other'),
                                        levels = c('IRB Number', 'Risk Assessment', 'Observed Harms',
                                                  'Team Risk', 'Consent', 'Other')))

pdf("03_Figures/Pct_Transparent.pdf")
ggplot(data = plot) +
  geom_col(aes(x = var, y = percent, fill = Date), position = 'dodge') +
  theme_minimal() +
  scale_fill_grey(start = 0.8, end = 0.2) +
  labs(fill = "Sample") +
  guides(fill = guide_legend(ncol = 1)) +
  theme(text = element_text(size = 20), 
        axis.text.x=element_text(angle=45, hjust = 1, vjust = 1),
        legend.position = "bottom") +
  scale_x_discrete('Category') +
  scale_y_continuous('Percent of Violence Articles', limits = c(0,0.5), labels = scales::percent)
dev.off()

log_close()

writeLines(readLines(lf))

